from scrapy.spiders import Spider
from scrapy import Request
from ..items import BooksItem


class BooksSpider(Spider):
    name = 'books'

    # 初始化请求，用于获取起始的URL
    def start_requests(self):
        url = 'http://books.toscrape.com/catalogue/category/books_1/index.html'
        yield Request(url)

    # 解析数据的函数
    def parse(self, response, **kwargs):
        item = BooksItem()
        selectors = response.xpath('//ol[@class="row"]/li')
        for selector in selectors:
            name = selector.xpath('article[@class="product_pod"]/h3/a/@title').extract()[0]
            price = selector.xpath('article[@class="product_pod"]/div[@class="product_price"]/p[1]/text()').extract()[0]
            rate = selector.xpath('article[@class="product_pod"]/p//@class').extract()[0]
            availability = selector.xpath('article[@class="product_pod"]/div[@class="product_price"]/p[2]/text()').extract()[1]
            img_url = selector.xpath('article[@class="product_pod"]/div[@class="image_container"]/a/img/@src').extract()[0]
            img_url = img_url.split("..")[-1]
            img_url = "http://books.toscrape.com" + img_url
            item["name"] = name
            item["price"] = price
            item["rate"] = rate
            item["availability"] = availability
            item["img_url"] = img_url
            # book_url = selector.xpath('article[@class="product_pod"]/h3/a/@href').extract()[0]
            # book_url = book_url.split("..")[-1]
            # book_url = "http://books.toscrape.com/catalogue" + book_url
            # yield scrapy.Request(book_url, callback=self.parse_book)
            yield item

        next_url = response.xpath('//li[@class="next"]/a/@href').extract()
        if next_url:
            next_url = "http://books.toscrape.com/catalogue/category/books_1/" + next_url[0]
            yield Request(next_url)

    # def parse_book(self, response):
    #     upc = response.xpath('//article[@class="product_page"]/table/tbody/tr[1]/td/text()').extract_first()
    #     item = BooksItem()
    #     item["upc"] = upc
    #     return item